Functional analysis

count_filtered_rel <- count_filtered %>%
  rownames_to_column(., "Genome") %>%
  mutate_at(vars(-Genome),~./sum(.))  %>%
  column_to_rownames(., "Genome")

#Run distillation
GIFTs <- distill(filtered_df,GIFT_db,genomecol=2,annotcol=c(9,10,19))
#GIFTs <- GIFTs[-c(9,10),]

# GIFTs_filtered <- GIFTs[rownames(GIFTs) %in% rownames(count_filtered_rel),]

#Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(GIFTs,GIFT_db)

#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements,GIFT_db)

#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)

#Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements,count_filtered_rel,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,count_filtered_rel,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,count_filtered_rel,GIFT_db)
not_shared_rows <- setdiff(rownames(count_filtered_rel), rownames(GIFTs_elements)) #"EHA01909_bin.13" "EHA01928_bin.9" 
rows_to_remove <- c("EHA01909_bin.13", "EHA01928_bin.9")
count_filtered_rel <- count_filtered_rel[!rownames(count_filtered_rel) %in% rows_to_remove, ]
# There are 2 bins that are present in count_filtered_rel (555) but not GIFTs elements (553). I removed them for analysis, but I am questioning why they are not there, and if removing them is valid for analysis. 

#Convert traits into distance matrix
dist <- traits2dist(GIFTs_elements, method="gower")

functional_div_A <- hilldiv(data=count_filtered_rel,dist=dist)

# is the GIFTs elements the distance matrix from this documentation: hilldiv(data=counts,dist=dist)

#Q (Rao's Q); D_q (functional hill number, the effective number of equally abundant and functionally equally distinct species); MD_q (mean functional diversity per species, the effective sum of pairwise distances between a fixed species and all other species); FD_q (total functional diversity, the effective total functional distance between species of the assemblage).

ELSA QUESTION: There are 2 bins that are present in count_filtered_rel (555) but not GIFTs elements (553). I removed them for analysis, but I am questioning why they are not there, and if removing them is valid for analysis

Average functional diversities

alpha_div_func <-  t(functional_div_A) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("sample") %>%
  merge(., all_metadata, by="sample")

table_mean_alpha_func <- alpha_div_func %>%
  group_by(region) %>% 
  summarise_at(.vars = names(.)[2],.funs = c(mean="mean", sd="sd"))

knitr::kable(table_mean_alpha_func, format = "html", full_width = F,col.names = c('Groups', 'Mean', 'SD'), digits = 3) %>%
  kable_styling(latex_options="scale_down")
Groups Mean SD
Daneborg 1.424 0.037
Ittoqqortoormii 1.456 0.061

Functional diversity plots

alpha_div_func %>%
  ggplot(aes(x = region, y = q1, group = region, color = region)) +
  geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE, coef = 0) +
  geom_jitter(width = 0.1, show.legend = TRUE) +
  theme(panel.background = element_blank(),
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black")) +
  xlab("Region") + 
  ylab("Functional diversity")

Functional beta diversity calculations

beta_div_func <- hillpair(data=count_filtered_rel,dist=dist, q=1)
#### NMDS
hill_pair_dis_func <- hillpair(data=count_filtered_rel, dist=dist, q=1)

# Generate NMDS ordination
hill_pair_dis_func_nmds <- hill_pair_dis_func %>%
                select(first,second,C) %>% #based on dissimilarity metric C
                as.data.frame() %>%
        pivot_wider(names_from = first, values_from = C) %>%
                column_to_rownames(var = "second") %>%
                as.matrix() %>%
                as.dist() %>%
                metaMDS(.,trymax = 500, k=2, verbosity=FALSE) %>%
                vegan::scores() %>%
                as_tibble(., rownames = "sample")

#Add metadata
hill_pair_dis_func_nmds <- hill_pair_dis_func_nmds %>%
      left_join(all_metadata, by = join_by(sample == sample))

NMDS:

using script from https://github.com/anttonalberdi/hilldiv2

#Plot ordination
ggplot(hill_pair_dis_func_nmds, aes(x=NMDS1,y=NMDS2, color=region)) +
        geom_point(size=3) +
        scale_color_manual(values = c("#46edc8","#374d7c")) +
        theme_classic() +
        theme(legend.position="bottom", legend.box="vertical")

Using adapted original script

NMDS.centroids_13=aggregate(hill_pair_dis_func_nmds[,c(2:3)],by=list(hill_pair_dis_func_nmds$region),FUN=mean)
colnames(NMDS.centroids_13) <- c("region","NMDS1","NMDS2")
hill_pair_dis_func_nmds=merge(hill_pair_dis_func_nmds,NMDS.centroids_13,by="region")

centroids_13 <- hill_pair_dis_func_nmds  %>%
  group_by(region)  %>%
  summarize(NM1=mean(NMDS1.y), NM2=mean(NMDS2.y))

ggplot(hill_pair_dis_func_nmds, aes(x=NMDS1.x,y=NMDS2.x,colour=region)) +
  geom_point(aes(x=NMDS1.x,y=NMDS2.x),size=2) + 
  geom_segment(aes(x=NMDS1.y, y=NMDS2.y, xend=NMDS1.x, yend=NMDS2.x))+ 
  theme(axis.line = element_line(colour = "grey"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        axis.title=element_blank(),
        legend.text=element_text(size=14))+
  # facet_wrap(~ factor(type))+
  theme_classic()+
  labs( x = "NMDS1", y = "NMDS2")

Functional capacity of the MAGs

Create Merge table and GIFT elements for Ittoq dogs

Create Merge table and GIFT elements for Daneborg dogs

Region Daneborg: tSNE Phylum

Region Daneborg: tSNE Phylum

Region Ittoq: tSNE gifts

Region Daneborg: tSNE gifts